0: Preparation

Defining the input and output files

Loading libraries

if (!require(knitr)) install.packages("knitr", dependencies = TRUE)
## Loading required package: knitr
library(knitr)

if (!require(ggplot2)) install.packages("ggplot2", dependencies = TRUE)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.2
library(ggplot2)

if (!require(scatterplot3d)) install.packages("scatterplot3d", dependencies = TRUE)
## Loading required package: scatterplot3d
## Warning: package 'scatterplot3d' was built under R version 4.3.1
library(scatterplot3d) # For generating the 3d plots 

if (!require(RColorBrewer)) install.packages("RColorBrewer", dependencies = TRUE)
## Loading required package: RColorBrewer
library(RColorBrewer) # For generating a color palette

Latex formatting function

Standard Error Confidence interval function

Confidence level <=> konfidensgrad

# Function to calculate standard error and its confidence interval
standard_error_confidence_interval_fun <- function(observed_data, confidence_level = 0.95) {
  # if ( nrow(observed_data) > 1 ) {
  
  if ( length(observed_data) > 1 ) {
      
  # Calculate standard error
  n <- length(observed_data)
  standard_deviation <- sd(observed_data)
  standard_error <- standard_deviation / sqrt(n - 1)
  
  # Calculate confidence interval based on standard error

  alpha <- (1 - confidence_level) / 2
  margin_of_error <- qnorm(1 - alpha) * standard_error
  mean_estimate <- mean(observed_data)
  # Calculate the percentiles for the confidence interval
  confidence_interval_lower_bound <- mean_estimate - margin_of_error # 2.5th percentile (2σ)
  confidence_interval_upper_bound <- mean_estimate + margin_of_error # 97.5th percentile (2σ)
  
  # Return confidence interval
  return(c(confidence_interval_lower_bound, confidence_interval_upper_bound))

    
  } else {
    return(c(NA,NA)) 
    }
  
  
} 





# # Function to calculate bootstrap confidence intervals
# standard_error_confidence_interval_fun <- function(observed_data, n_bootstraps = 1000, confidence_level = 0.95) {
#   
#   # Function to calculate the mean for each bootstrap sample
#   compute_bootstrap_mean_fun <- function(observed_data_urn) {
#     bootstrap_dataset <- sample(observed_data_urn, replace = TRUE)
#     bootstrap_estimate <- mean(bootstrap_dataset)
#     return(bootstrap_estimate)
#   }
#   
#   # Perform bootstrap resampling
#   bootstrap_point_estimates <- replicate(n_bootstraps, compute_bootstrap_mean_fun(observed_data))
#   
#   # Calculate the percentiles for the confidence interval
#   alpha <- (1 - confidence_level) / 2
#   confidence_interval_lower_bound <- quantile(bootstrap_point_estimates, alpha) # 2.5th percentile
#   confidence_interval_upper_bound <- quantile(bootstrap_point_estimates, 1 - alpha) # 97.5th percentile
#   
#   # Return the confidence interval
#   return(c(confidence_interval_lower_bound, confidence_interval_upper_bound))
# }

Causative variant

Variant fixation

Fixation probability

Fixation time

# Function to determine the number of rows until fixation is reached
time_to_fixation <- function(data, threshold = 0.9) {
  # Find the index of the first row where the allele frequency is 0.9 or higher
  fixation_index <- which(data$allele_frequency >= threshold)[1]
  
  # Return the number of rows until fixation is reached
  return(fixation_index - 1)
}

Summary - Variant fixation

## [1] "C:/Users/jonat/GitHub/Computational-modelling-of-genomic-inbreeding-and-roh-islands-in-extremely-small-populations/resH17_1_AC/Pipeline_results/Causative_variant_Fixation_summary.txt"
Selection_coefficient Fixation_probability Avg_Fixation_time Min_fixation_time Max_fixation_time
s=0.1 1.0 55 45 64
s=0.2 41.7 46 37 58
s=0.3 57.1 33 23 40
s=0.4 76.9 29 21 37
s=0.5 69.0 25 20 31
s=0.6 74.1 20 17 23
s=0.7 62.5 19 16 24
s=0.8 83.3 17 14 21

Causative variant windows

Variant position

Window lengths

Causative variant windows

Expected Heterozygosity

## Point estimate H_e across all 20 simulations for  s01_chr9 : 0.1409224 //n[1] "Bootstrap 95% Confidence Interval: [0.0925741889802892, 0.189270649535389]"
## Point estimate H_e across all 20 simulations for  s02_chr9 : 0.1468773 //n[1] "Bootstrap 95% Confidence Interval: [0.112635475455286, 0.181119085299917]"
## Point estimate H_e across all 20 simulations for  s03_chr9 : 0.1029798 //n[1] "Bootstrap 95% Confidence Interval: [0.0516602472515632, 0.154299390835809]"
## Point estimate H_e across all 20 simulations for  s04_chr9 : 0.1187659 //n[1] "Bootstrap 95% Confidence Interval: [0.078310650686274, 0.159221155639888]"
## Point estimate H_e across all 20 simulations for  s05_chr9 : 0.1375201 //n[1] "Bootstrap 95% Confidence Interval: [0.0970728581788342, 0.17796743001132]"
## Point estimate H_e across all 20 simulations for  s06_chr9 : 0.07875692 //n[1] "Bootstrap 95% Confidence Interval: [0.0561528857574954, 0.101360963717184]"
## Point estimate H_e across all 20 simulations for  s07_chr9 : 0.1270691 //n[1] "Bootstrap 95% Confidence Interval: [0.068746299435772, 0.185391996295458]"
## Point estimate H_e across all 20 simulations for  s08_chr9 : 0.1242718 //n[1] "Bootstrap 95% Confidence Interval: [0.0746607385549174, 0.173882852083258]"

Summary - Causative variant windows

## [1] "C:/Users/jonat/GitHub/Computational-modelling-of-genomic-inbreeding-and-roh-islands-in-extremely-small-populations/resH17_1_AC/Pipeline_results/Causative_variant_windows_summary.txt"
Sel.coeff Length_Mb Length_lower_CI Length_Upper_CI ROH_freq ROH_freq_lower_CI ROH_freq_upper_CI Min_freq Max_freq variant_subwindow_freq variant_subwindow_freq_lower_CI variant_subwindow_freq_upper_CI Avg_H_e H_e_lower_CI H_e_upper_CI
s=0.1 0.72 0.55 0.90 77.2 68.4 86.0 23.3 99.5 78.9 70.3 87.5 0.141 0.093 0.189
s=0.2 1.01 0.77 1.25 76.6 69.1 84.1 28.8 100.0 78.3 70.4 86.1 0.147 0.113 0.181
s=0.3 1.26 1.00 1.53 90.9 84.6 97.2 37.7 100.0 91.8 85.5 98.1 0.103 0.052 0.154
s=0.4 1.15 0.79 1.50 89.8 85.0 94.6 60.5 100.0 91.0 86.1 95.9 0.119 0.078 0.159
s=0.5 1.38 1.02 1.74 92.0 86.6 97.4 43.3 100.0 92.8 87.5 98.1 0.138 0.097 0.178
s=0.6 2.05 1.57 2.53 97.0 94.9 99.1 75.3 100.0 97.9 95.7 100.1 0.079 0.056 0.101
s=0.7 2.10 1.54 2.67 93.7 87.1 100.4 34.0 100.0 94.5 88.1 100.9 0.127 0.069 0.185
s=0.8 2.00 1.56 2.45 94.4 90.4 98.4 61.4 100.0 95.3 91.6 99.1 0.124 0.075 0.174

1: ROH-Frequency

1.1 : Autosome ROH-frequencies

1.1.1 : Empirical - ROH frequency

1.1.2: Neutral Model - ROH frequency

1.1.3: Selection Model

1.1.4 Summary - ROH-hotspot threshold

## ROH-hotspot selection testing results://n
## [1] "C:/Users/jonat/GitHub/Computational-modelling-of-genomic-inbreeding-and-roh-islands-in-extremely-small-populations/resH17_1_AC/Pipeline_results/ROH-hotspot_threshold_comparison.txt"
Model Avg_ROH_hotspot_threshold Lower_CI Upper_CI
Neutral 51.6 44.4 58.8
Empirical 55.4 NA NA
s=0.2 77.6 70.1 85.1
s=0.1 80.3 75.1 85.5
s=0.4 92.3 88.0 96.6
s=0.3 92.9 88.0 97.8
s=0.5 94.9 90.3 99.6
s=0.7 96.5 93.2 99.8
s=0.8 98.1 96.3 99.9
s=0.6 98.4 96.6 100.2

1.2 ROH-hotspots - ROH Frequency and Length

2: Inbreeding coefficient

2.1 Empirical data

## Overall Average Avg_F_ROH for  labrador_retriever : 0.2333818 //n

2.2 Neutral Model

## Point estimate F_ROH across all 20 simulations: 0.2314507 //n
## [1] "Bootstrap 95% Confidence Interval: [0.219002752647896, 0.243898726421872]"

2.3 Selection Model

## Point estimate F_ROH across all 20 simulations for  selection_model_s01_chr9 : 0.2656486 //n[1] "Bootstrap 95% Confidence Interval: [0.252240623253689, 0.279056655816078]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s02_chr9 : 0.2777609 //n[1] "Bootstrap 95% Confidence Interval: [0.261878746605762, 0.29364312316168]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s03_chr9 : 0.3017125 //n[1] "Bootstrap 95% Confidence Interval: [0.284779672093352, 0.31864533255781]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s04_chr9 : 0.3109797 //n[1] "Bootstrap 95% Confidence Interval: [0.298039216034974, 0.323920221174329]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s05_chr9 : 0.3077886 //n[1] "Bootstrap 95% Confidence Interval: [0.28852077497684, 0.32705650874409]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s06_chr9 : 0.3611243 //n[1] "Bootstrap 95% Confidence Interval: [0.342370560473491, 0.379877960456742]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s07_chr9 : 0.3600298 //n[1] "Bootstrap 95% Confidence Interval: [0.343113084116087, 0.376946436814146]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s08_chr9 : 0.3408198 //n[1] "Bootstrap 95% Confidence Interval: [0.327834315751562, 0.353805223783321]"

2.4 F_ROH summary

## [1] "C:/Users/jonat/GitHub/Computational-modelling-of-genomic-inbreeding-and-roh-islands-in-extremely-small-populations/resH17_1_AC/Pipeline_results/F_ROH_comparison.txt"
Model F_ROH Lower_CI Upper_CI
Neutral 0.231 0.219 0.244
Empirical 0.233 NA NA
s=0.1 0.266 0.252 0.279
s=0.2 0.278 0.262 0.294
s=0.3 0.302 0.285 0.319
s=0.5 0.308 0.289 0.327
s=0.4 0.311 0.298 0.324
s=0.8 0.341 0.328 0.354
s=0.7 0.360 0.343 0.377
s=0.6 0.361 0.342 0.380

3: Expected Heterozygosity

3.1 Empirical data

3.2 Neutral Model

3.3 Selection Model

3.4 Genomewide Average H_e

Model H_e Lower_CI Upper_CI
s=0.7 0.310 0.305 0.315
s=0.6 0.311 0.305 0.316
s=0.4 0.313 0.308 0.318
s=0.8 0.316 0.310 0.321
Empirical 0.316 NA NA
s=0.1 0.317 0.312 0.322
s=0.2 0.319 0.314 0.325
s=0.5 0.320 0.314 0.327
s=0.3 0.322 0.316 0.327
Neutral 0.330 0.325 0.335

3.5 Genomewide 5th percentile comparison - Expected Heterozygosity Summary

## [1] "C:/Users/jonat/GitHub/Computational-modelling-of-genomic-inbreeding-and-roh-islands-in-extremely-small-populations/resH17_1_AC/Pipeline_results/Expected_Heterozygosity_Summary.txt"
Model H_e_5th_percentile Lower_CI Upper_CI
s=0.6 0.119 0.108 0.130
s=0.7 0.120 0.109 0.131
s=0.8 0.131 0.120 0.142
s=0.4 0.132 0.119 0.144
s=0.5 0.136 0.122 0.151
s=0.1 0.139 0.130 0.148
s=0.2 0.139 0.126 0.152
s=0.3 0.142 0.128 0.156
Empirical 0.151 NA NA
Neutral 0.179 0.170 0.187

4: Summary

4.0 General comparison

4.0.1 ROH-hotspot threshold comparison

## 
##  ROH-hotspot threshold comparison between the different datasets
Model Avg_ROH_hotspot_threshold Lower_CI Upper_CI
Neutral 51.6 44.4 58.8
Empirical 55.4 NA NA
s=0.2 77.6 70.1 85.1
s=0.1 80.3 75.1 85.5
s=0.4 92.3 88.0 96.6
s=0.3 92.9 88.0 97.8
s=0.5 94.9 90.3 99.6
s=0.7 96.5 93.2 99.8
s=0.8 98.1 96.3 99.9
s=0.6 98.4 96.6 100.2

4.0.2 F_ROH comparison

## Models where empirical F_ROH is within CI:
## [1] "Neutral"
## 
## Models where empirical F_ROH is outside CI:
## [1] "s=0.1" "s=0.2" "s=0.3" "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"
## png 
##   2
Model F_ROH Lower_CI Upper_CI
Neutral 0.231 0.219 0.244
Empirical 0.233 NA NA
s=0.1 0.266 0.252 0.279
s=0.2 0.278 0.262 0.294
s=0.3 0.302 0.285 0.319
s=0.5 0.308 0.289 0.327
s=0.4 0.311 0.298 0.324
s=0.8 0.341 0.328 0.354
s=0.7 0.360 0.343 0.377
s=0.6 0.361 0.342 0.380

4.1: Hotspot comparison

4.1.1: Selection test (Sweep test)

## [1] "Selection test results"
## [1] "ROH-hotspot windows with an mean H_e Value lower or equal to the lower confidence interval of the fifth percentile of the neutral model are classified as being under selection"
## [1] "5th percentile of the neutral model is: 0.16993"
Name Under_selection Window_based_Average_H_e
hotspot_chr28_window_1 Yes 0.118
hotspot_chr1_window_1 Yes 0.144
hotspot_chr24_window_1 No 0.186
hotspot_chr13_window_1 No 0.190
hotspot_chr11_window_2 No 0.205
hotspot_chr15_window_1 No 0.223
hotspot_chr6_window_1 No 0.232
hotspot_chr11_window_1 No 0.233
hotspot_chr8_window_1 No 0.307
## [1] "C:/Users/jonat/GitHub/Computational-modelling-of-genomic-inbreeding-and-roh-islands-in-extremely-small-populations/resH17_1_AC/Pipeline_results/ROH_hotspots_Selection_testing_neutral_model_H_E_threshold_0.17.csv.txt"
## [1] "ROH-hotspots under selection:"
Name Under_selection Window_based_Average_H_e
hotspot_chr28_window_1 Yes 0.118
hotspot_chr1_window_1 Yes 0.144

4.1.2: Selection Strength Test (Sweep test)

4.1.2.1 Extracting Causative windows under selection

## [1] "C:/Users/jonat/GitHub/Computational-modelling-of-genomic-inbreeding-and-roh-islands-in-extremely-small-populations/resH17_1_AC/Pipeline_results/Causative_windows_under_selection.txt"
Model H_e Lower_CI Upper_CI Under_Selection
s=0.6 0.079 0.056 0.101 Yes
s=0.3 0.103 0.052 0.154 Yes
s=0.4 0.119 0.078 0.159 Yes
s=0.8 0.124 0.075 0.174 Yes
s=0.7 0.127 0.069 0.185 Yes
s=0.5 0.138 0.097 0.178 Yes
s=0.1 0.141 0.093 0.189 Yes
s=0.2 0.147 0.113 0.181 Yes
Neutral 0.179 0.170 0.187 No

4.1.2.3 Extracting Hotspots under selection

Hotspot - Causative Window - Comparison

Model Lengths_Mb Length_lower_ci Length_upper_ci ROH_Freq ROH_freq_lower_ci ROH_freq_upper_ci H_e H_e_lower_ci H_e_upper_ci
s=0.8 2.00 1.56 2.45 94.4 90.4 98.4 0.124 0.075 0.174
s=0.7 2.10 1.54 2.67 93.7 87.1 100.4 0.127 0.069 0.185
s=0.6 2.05 1.57 2.53 97.0 94.9 99.1 0.079 0.056 0.101
s=0.5 1.38 1.02 1.74 92.0 86.6 97.4 0.138 0.097 0.178
s=0.4 1.15 0.79 1.50 89.8 85.0 94.6 0.119 0.078 0.159
s=0.3 1.26 1.00 1.53 90.9 84.6 97.2 0.103 0.052 0.154
s=0.2 1.01 0.77 1.25 76.6 69.1 84.1 0.147 0.113 0.181
s=0.1 0.72 0.55 0.90 77.2 68.4 86.0 0.141 0.093 0.189
Hotspot_chr28_window_1 1.40 NA NA 75.0 NA NA 0.118 NA NA
Hotspot_chr1_window_1 0.70 NA NA 56.2 NA NA 0.144 NA NA

Hotspot, Causativa window - 3D plot-Comparison

setwd(output_dir)

# plot_title <- paste("Length And Average ROH % Comparison - ROH Hotspots & Causative Windows")
plot_title <- paste("ROH Hotspot(s) & Causative Windows comparison")

# Modify the labels by removing "Hotspot_" prefix and "_window" suffix from hotspot models
Hotspots_and_Causative_windows_comparison_sorted$Label <- ifelse(
  grepl("Hotspot", Hotspots_and_Causative_windows_comparison_sorted$Model), 
  gsub("Hotspot_|_window", "", Hotspots_and_Causative_windows_comparison_sorted$Model), 
  Hotspots_and_Causative_windows_comparison_sorted$Model
)

# Removing the "s=" part from the selection coefficients for the plot display
Hotspots_and_Causative_windows_comparison_sorted$Label <- gsub("^s=", "", Hotspots_and_Causative_windows_comparison_sorted$Label)

# Create a new column for identifying the type
Hotspots_and_Causative_windows_comparison_sorted$Type <- ifelse(
  grepl("Hotspot", Hotspots_and_Causative_windows_comparison_sorted$Model), 
  "Hotspot", 
  "Selection Coefficient"
)

# Generate a color palette for the hotspots
hotspot_models <- unique(Hotspots_and_Causative_windows_comparison_sorted$Model[Hotspots_and_Causative_windows_comparison_sorted$Type == "Hotspot"])
num_hotspots <- length(hotspot_models)

# Choose the color palette based on the number of hotspots
if (num_hotspots == 2) {
  color_palette_name <- "Set2"
} else {
  color_palette_name <- "Set3"
}

# Get the colors for the hotspots
hotspot_colors <- setNames(brewer.pal(n = num_hotspots, name = color_palette_name), hotspot_models)
## Warning in brewer.pal(n = num_hotspots, name = color_palette_name): minimal value for n is 3, returning requested palette with 3 different levels
# Assign colors to each point
Hotspots_and_Causative_windows_comparison_sorted$Color <- ifelse(
  Hotspots_and_Causative_windows_comparison_sorted$Type == "Hotspot", 
  hotspot_colors[Hotspots_and_Causative_windows_comparison_sorted$Model], 
  "blue"
)

x_value <- Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq
x_axis_label <- "Avg ROH-frequency (%)"
y_value <- Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb
y_axis_label <- "Length (Mb)"
z_value <- Hotspots_and_Causative_windows_comparison_sorted$H_e
z_axis_label <- "Avg H_e"


# x_value <- Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb
# x_axis_label <- "Length (Mb)"
# y_value <- Hotspots_and_Causative_windows_comparison_sorted$H_e
# y_axis_label <- "Avg H_e"
# z_value <- Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq
# z_axis_label <- "Avg ROH-frequency (%)"


x_value <- Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq
x_axis_label <- "Avg ROH-frequency (%)"
y_value <- Hotspots_and_Causative_windows_comparison_sorted$H_e
y_axis_label <- "Avg H_e"
z_value <- Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb
z_axis_label <- "Length (Mb)"

# x_value <- Hotspots_and_Causative_windows_comparison_sorted$H_e
# x_axis_label <- "Avg H_e"
# y_value <- Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq
# y_axis_label <- "Avg ROH-frequency (%)"
# z_value <- Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb
# z_axis_label <- "Length (Mb)"

# # Create and save the 3D scatter plot as a PNG file
# png(filename = "3dplot_Hotspot_Causative_Window_Comparison.png", width = 1920, height = 1080, res = 300)
# png(filename = "3dplot_Hotspot_Causative_Window_Comparison.png",width = 800, height = 600, res = 300)


# Create the 3D scatter plot
s3d <- scatterplot3d(
  x_value  ,
  y_value,
  z_value,
  color = Hotspots_and_Causative_windows_comparison_sorted$Color,
  pch = 19, # Solid circle
  xlab = x_axis_label,
  ylab = y_axis_label,
  zlab = z_axis_label,
  main = plot_title
)

# Add shadows on the x-y plane (z = 0)
s3d$points3d(
  x_value,
  y_value,
  rep(0, nrow(Hotspots_and_Causative_windows_comparison_sorted)),  # Set z to 0 for shadow
  col = adjustcolor(Hotspots_and_Causative_windows_comparison_sorted$Color, alpha.f = 0.3), # Semi-transparent shadows
  pch = 19
)

# Convert coordinates for adding labels
s3d.coords <- s3d$xyz.convert(
  x_value,
  y_value,
  z_value
)

# Add labels to the original points
text(s3d.coords$x, s3d.coords$y, 
     labels = Hotspots_and_Causative_windows_comparison_sorted$Label, 
     pos = 3, cex = 0.5) # pos=3 means above

# # Close the graphics device
# dev.off()
# Sort the data frame based on average fixation time, in descending order
 kable(Hotspots_and_Causative_windows_comparison[order(-Hotspots_and_Causative_windows_comparison$ROH_Freq), ])
Model Lengths_Mb Length_lower_ci Length_upper_ci ROH_Freq ROH_freq_lower_ci ROH_freq_upper_ci H_e H_e_lower_ci H_e_upper_ci
8 s=0.6 2.05 1.57 2.53 97.0 94.9 99.1 0.079 0.056 0.101
10 s=0.8 2.00 1.56 2.45 94.4 90.4 98.4 0.124 0.075 0.174
9 s=0.7 2.10 1.54 2.67 93.7 87.1 100.4 0.127 0.069 0.185
7 s=0.5 1.38 1.02 1.74 92.0 86.6 97.4 0.138 0.097 0.178
5 s=0.3 1.26 1.00 1.53 90.9 84.6 97.2 0.103 0.052 0.154
6 s=0.4 1.15 0.79 1.50 89.8 85.0 94.6 0.119 0.078 0.159
3 s=0.1 0.72 0.55 0.90 77.2 68.4 86.0 0.141 0.093 0.189
4 s=0.2 1.01 0.77 1.25 76.6 69.1 84.1 0.147 0.113 0.181
2 Hotspot_chr28_window_1 1.40 NA NA 75.0 NA NA 0.118 NA NA
1 Hotspot_chr1_window_1 0.70 NA NA 56.2 NA NA 0.144 NA NA
 kable(Hotspots_and_Causative_windows_comparison[order(-Hotspots_and_Causative_windows_comparison$H_e), ])
Model Lengths_Mb Length_lower_ci Length_upper_ci ROH_Freq ROH_freq_lower_ci ROH_freq_upper_ci H_e H_e_lower_ci H_e_upper_ci
4 s=0.2 1.01 0.77 1.25 76.6 69.1 84.1 0.147 0.113 0.181
1 Hotspot_chr1_window_1 0.70 NA NA 56.2 NA NA 0.144 NA NA
3 s=0.1 0.72 0.55 0.90 77.2 68.4 86.0 0.141 0.093 0.189
7 s=0.5 1.38 1.02 1.74 92.0 86.6 97.4 0.138 0.097 0.178
9 s=0.7 2.10 1.54 2.67 93.7 87.1 100.4 0.127 0.069 0.185
10 s=0.8 2.00 1.56 2.45 94.4 90.4 98.4 0.124 0.075 0.174
6 s=0.4 1.15 0.79 1.50 89.8 85.0 94.6 0.119 0.078 0.159
2 Hotspot_chr28_window_1 1.40 NA NA 75.0 NA NA 0.118 NA NA
5 s=0.3 1.26 1.00 1.53 90.9 84.6 97.2 0.103 0.052 0.154
8 s=0.6 2.05 1.57 2.53 97.0 94.9 99.1 0.079 0.056 0.101

Visualizing and scaling

# Min-max-scaling normalization function, that normalizes a vector of values
min_max_normalization_fun <- function(x) {
  return((x - min(x)) / (max(x) - min(x)))
}
### Min max scaling ###
# Normalize Lengths_Mb
Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb_Normalized <- min_max_normalization_fun(Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb)
# Normalize ROH Frequency
Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq_Normalized <- min_max_normalization_fun(Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq)
# Normalize H_e
Hotspots_and_Causative_windows_comparison_sorted$H_e_Normalized <- min_max_normalization_fun(Hotspots_and_Causative_windows_comparison_sorted$H_e)

# Modify the labels by removing "Hotspot_" prefix and "_window" suffix from hotspot models
Hotspots_and_Causative_windows_comparison_sorted$Label <- ifelse(
  grepl("Hotspot", Hotspots_and_Causative_windows_comparison_sorted$Model), 
  gsub("Hotspot_|_window", "", Hotspots_and_Causative_windows_comparison_sorted$Model), 
  Hotspots_and_Causative_windows_comparison_sorted$Model
)

# Create a new column for identifying the type
Hotspots_and_Causative_windows_comparison_sorted$Type <- ifelse(
  grepl("Hotspot", Hotspots_and_Causative_windows_comparison_sorted$Model), 
  "Hotspot", 
  "Selection Coefficient"
)

plot_title <- paste("Normalized ROH Hotspot(s) & Causative Windows comparison")


# Create the 3D scatter plot
s3d <- scatterplot3d(
  Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb_Normalized,
  Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq_Normalized,
  Hotspots_and_Causative_windows_comparison_sorted$H_e_Normalized,
  color = ifelse(Hotspots_and_Causative_windows_comparison_sorted$Type == "Hotspot", "red", "blue"),
  pch = 19, # Solid circle
  xlab = "Normalized Lengths",
  ylab = "Normalized Mean ROH Frequency",
  zlab = "Normalized H_e",
  main = plot_title
)

# Add labels to the points
s3d.coords <- s3d$xyz.convert(
  Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb_Normalized,
  Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq_Normalized,
  Hotspots_and_Causative_windows_comparison_sorted$H_e_Normalized
)
text(s3d.coords$x, s3d.coords$y, 
     labels = Hotspots_and_Causative_windows_comparison_sorted$Label, 
     pos = 3, cex = 0.5) # pos=3 means above

# Visualization for Selection Strength estimation

5.1 Standard Error CI for H_e

## Models where empirical H_e is within CI for hotspot_chr28_window_1 :
## [1] "s=0.1" "s=0.2" "s=0.3" "s=0.4" "s=0.5" "s=0.7" "s=0.8"
## 
## Models where empirical H_e is outside CI for hotspot_chr28_window_1 :
## [1] "s=0.6"
## Models where empirical H_e is within CI for hotspot_chr1_window_1 :
## [1] "s=0.1" "s=0.2" "s=0.3" "s=0.4" "s=0.5" "s=0.7" "s=0.8"
## 
## Models where empirical H_e is outside CI for hotspot_chr1_window_1 :
## [1] "s=0.6"

5.2 Standard Error CI for Length

## TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Models where empirical H_e is within CI for Hotspot_chr1_window_1 :
## [1] "s=0.1"
## 
## Models where empirical H_e is outside CI for Hotspot_chr1_window_1 :
## [1] "s=0.2" "s=0.3" "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"
## FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE
## Models where empirical H_e is within CI for Hotspot_chr28_window_1 :
## [1] "s=0.3" "s=0.4" "s=0.5"
## 
## Models where empirical H_e is outside CI for Hotspot_chr28_window_1 :
## [1] "s=0.1" "s=0.2" "s=0.6" "s=0.7" "s=0.8"

5.3 Standard Error CI for ROH Freq

## Models where empirical H_e is within CI for Hotspot_chr1_window_1 :
## character(0)
## 
## Models where empirical H_e is outside CI for Hotspot_chr1_window_1 :
## [1] "s=0.1" "s=0.2" "s=0.3" "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"
## Models where empirical H_e is within CI for Hotspot_chr28_window_1 :
## [1] "s=0.1" "s=0.2"
## 
## Models where empirical H_e is outside CI for Hotspot_chr28_window_1 :
## [1] "s=0.3" "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"